## title:   Narcissism in Political Participation
## authors: Z. Fazekas & P.K. Hatemi
## study:   US 2015
## goal:    clean and prepare covariates
##          clean and prepare outcome variables
##          clean and prepare narcissism variables
##          measurement summary
## ----
# 0. packages and helper --
library("psych")
library("dplyr")
library("stringr")
library("semTools")

## 1. load data
source("helper.R")
us <- read.csv("./data/us15.csv")

## 2. covariates --
us$female <- us$gender - 1
## dichotomous education
us$edu_cat <- ifelse(us$educ > 3, 1, 0)
us$age <- 2015 - us$birthyr ## age

## ethnicity/race
us$not_caucasian <- ifelse(us$race > 1, 1, 0)

us$interest <- us$newsint
us$interest[us$newsint == 7] <- NA
us$interest <- (4-us$interest)/3

## 3. participation --

part_vars <- paste0("polpartic_", 1:8)## same items as in Danish study
us[, part_vars] <- apply(us[, part_vars], 2, 
                        function (x) car::recode(x, "5 = NA"))
us[, part_vars] <- (4 - us[, part_vars])/3
psych::alpha(us[, part_vars]) ## 0.89 std.alpha, all +


us$part_full <- rowSums(us[, part_vars])/8 ## within individual summed (mean)
                                           ## participation score

## narcissism --
npi_vars <- paste0("PI", 1:40)
us[, npi_vars] <- us[, npi_vars] - 1

## based on codebook
to_reverse <- c(1:3, 6, 8, 11:14, 16, 21, 24, 25, 27, 29:31, 33,
                34, 36:39)
us[, npi_vars[to_reverse]] <- 1 - us[, npi_vars[to_reverse]]

psych::alpha(us[, npi_vars], check.keys = TRUE) ## all +, 0.86 alpha

aut_vars <- npi_vars[c(1, 8, 10, 11, 12, 32, 33, 36)] ## Authority
exh_vars <- npi_vars[c(2, 3, 7, 20, 28, 30, 38)] ## Exhibitionism (not in DK)
sup_vars <- npi_vars[c(4, 9, 26, 37, 40)] ## Superiority
exp_vars <- npi_vars[c(6, 13, 16, 23, 35)] ## Exploitativeness
ent_vars <- npi_vars[c(5, 14, 18, 24, 25, 27)] ## Entitlement
van_vars <- npi_vars[c(15, 19, 29)] ## Vanity
suf_vars <- npi_vars[c(17, 21, 22, 31, 34, 39)] ## Self-sufficiency

psych::alpha(us[, aut_vars]) ## 0.76 reliability
psych::alpha(us[, exh_vars]) ## 0.71 reliability
psych::alpha(us[, sup_vars]) ## 0.57 reliability
psych::alpha(us[, exp_vars]) ## 0.55 reliability
psych::alpha(us[, ent_vars]) ## 0.47 reliability
psych::alpha(us[, van_vars]) ## 0.61  reliability
psych::alpha(us[, suf_vars]) ## 0.41 reliability

## 3-factor variables
lead_auth   <- c("PI1", "PI5", "PI10", "PI11", "PI12", "PI27", "PI32", "PI33", "PI34", "PI36", "PI40")
grand_exhib <- c("PI4", "PI7", "PI15", "PI19", "PI20", "PI26", "PI28", "PI29", "PI30", "PI38")
ent_exp     <- c("PI13", "PI14", "PI24", "PI25")
npi25_vars <- c(lead_auth, grand_exhib, ent_exp)

psych::alpha(us[, lead_auth]) ## 0.76 reliability
psych::alpha(us[, grand_exhib]) ## 0.75 reliability
psych::alpha(us[, ent_exp]) ## 0.43 reliability
psych::alpha(us[, npi25_vars]) ## 0.82 reliability

## 7-factor model, CFA check
## note: npi22 is not sig. related, kept in
npi.factor <-  'auth =~ PI1 + PI8 + PI10 + PI11 + PI12 + PI32 + PI33 + PI36
               exhib =~ PI2 + PI3 + PI7 + PI20 + PI28 + PI30 + PI38
                 sup =~ PI4 + PI9 + PI26 + PI37 + PI40
                 exp =~ PI6 + PI13 + PI16 + PI23 + PI35
                 ent =~ PI5 + PI14 + PI18 + PI24 + PI25 + PI27
                 van =~ PI15 + PI19 + PI29
                 suf =~ PI17 + PI21 + PI22 + PI31 + PI34 + PI39'
npi.factor.fit <- cfa(npi.factor, data = us, ordered = npi_vars)
summary(npi.factor.fit, fit.measures = TRUE)

# check the 3-factor model
npi.3factor <-  'lead_auth =~ PI1 + PI5 + PI10 + PI11 + PI12 + PI27 + PI32 + PI33 + PI34 + PI36 + PI40
                 grand_exh =~ PI4 + PI7 + PI15 + PI19 + PI20 + PI26 + PI28 + PI29 + PI30 + PI38
                 ent_exp   =~ PI13 + PI14 + PI24 + PI25'
npi.3factor.fit <- cfa(npi.3factor, data = us, ordered = npi_vars)
summary(npi.3factor.fit, fit.measures = TRUE, standardized = TRUE)


# extract 7 and 3-factor results
fit_40 <- data.frame(standardizedSolution(npi.factor.fit))[1:40, c(1, 3:5)]
fit_40[, 3:4] <- round(fit_40[, 3:4], 3)
fit_40$se[fit_40$se == 0.000 & fit_40$est != 1] <- 0.001
fit_40$rhs <- stringr::str_replace(fit_40$rhs, "PI", "Item")

fit_40$lhs <- stringr::str_replace(fit_40$lhs, "auth", "Authority")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "exhib", "Exhibitionism")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "sup", "Superiority")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "exp", "Exploitativeness")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "ent", "Entitlement")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "van", "Vanity")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "suf", "Self-sufficiency")

fit_40$lhs <- factor(fit_40$lhs, levels = c("Authority",
                                            "Exhibitionism",
                                            "Superiority",
                                            "Exploitativeness",
                                            "Entitlement",
                                            "Vanity",
                                            "Self-sufficiency"))
fit_40 <- mutate(fit_40, label = paste0(est.std, " (", se, ")"))
fit_40_us15 <- fit_40 %>% group_by(lhs) %>% 
  dplyr::select(lhs, rhs, label)

# 3-factor
fit_25 <- data.frame(standardizedSolution(npi.3factor.fit))[1:25, c(1, 3:5)]
fit_25[, 3:4] <- round(fit_25[, 3:4], 3)
fit_25$se[fit_25$se == 0.000 & fit_25$est != 1] <- 0.001
fit_25$rhs <- stringr::str_replace(fit_25$rhs, "PI", "Item")

fit_25$lhs <- stringr::str_replace(fit_25$lhs, "lead_auth", "Leadership/Authority")
fit_25$lhs <- stringr::str_replace(fit_25$lhs, "ent_exp", "Entitlement/Exploitativeness")
fit_25$lhs <- stringr::str_replace(fit_25$lhs, "grand_exh", "Grandiose/Exhibitionism")
fit_25$lhs <- factor(fit_25$lhs, levels = c("Leadership/Authority",
                                            "Grandiose/Exhibitionism",
                                            "Entitlement/Exploitativeness"))
fit_25 <- mutate(fit_25, label = paste0(est.std, " (", se, ")"))
fit_25_us15 <- fit_25 %>% group_by(lhs) %>% 
  dplyr::select(lhs, rhs, label)

fit_25_us15 
fit_40_us15

us$npi <- rowSums(us[, npi_vars])/length(npi_vars)
us$authority <- rowSums(us[, aut_vars])/length(aut_vars)
us$exhib <- rowSums(us[, exh_vars])/length(exh_vars)
us$exploit <- rowSums(us[, exp_vars])/length(exp_vars)
us$entitle <- rowSums(us[, ent_vars])/length(ent_vars)
us$sufficient <- rowSums(us[, suf_vars])/length(suf_vars)
us$vanity   <- rowSums(us[, van_vars])/length(van_vars)
us$superior <- rowSums(us[, sup_vars])/length(sup_vars)

narc_vars <- c("npi", "authority", "exhib", "superior", 
               "exploit", "entitle", "vanity", "sufficient") # store out narcissism variable names

us$npi25 <- rowSums(us[, npi25_vars])/length(npi25_vars)
us$leadauth <- rowSums(us[, lead_auth])/length(lead_auth)
us$entexp <- rowSums(us[, ent_exp])/length(ent_exp)
us$grandexhib <- rowSums(us[, grand_exhib])/length(grand_exhib)

narc25_vars <- c("npi25", "leadauth", "grandexhib", "entexp") # store out narcissism variable names

# means/SD 
apply(us[, narc_vars], 2, function(x) paste0(round(mean(x, na.rm = TRUE), 2),
                                             " (", round(sd(x, na.rm = TRUE), 2), ")"))

apply(us[, narc25_vars], 2, function(x) paste0(round(mean(x, na.rm = TRUE), 2),
                                             " (", round(sd(x, na.rm = TRUE), 2), ")"))


## turnout

us$turnout_2012 <- NA ## 2012 presidential elections
us$turnout_2012[us$presvote < 4 ] <- 1
us$turnout_2012[us$presvote == 6] <- 0
us$turnout_2012[us$age < 22] <- NA

us$turnout_2014 <- NA ## 2014 midterm
us$turnout_2014[us$midterm == 1 ] <- 1
us$turnout_2014[us$midterm == 2] <- 0
us$turnout_2014[us$age < 20] <- NA


## variable recoding and export for regression
## keeping only variables of future interest

us <- dplyr::select(us, caseid, weight, female:turnout_2014)

## 2SD standardization of variables of interest
narc_comp <- c("npi", "authority", "exploit", "sufficient", 
                      "entitle", "vanity", "superior", "exhib",
               narc25_vars)
to_recode <- c(narc_comp, "age")
rec_vars  <- paste0(to_recode, "_sd")

us[, rec_vars] <- apply(us[, to_recode], 2,
                        function (x) two_sd(x))

## add study ID
us$study <- "US15"
write.csv(us, file = "./data/us15-reg.csv", row.names = FALSE)

# R version 3.5.1 (2018-07-02)
# Platform: x86_64-apple-darwin15.6.0 (64-bit)
# Running under: macOS  10.15.4
# 
# Matrix products: default
# BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
# LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
# 
# locale:
#   [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
# 
# attached base packages:
#   [1] stats     graphics  grDevices utils     datasets  methods   base     
# 
# other attached packages:
#   [1] semTools_0.5-1 lavaan_0.6-3   stringr_1.4.0  dplyr_0.8.3    psych_1.8.12  
# 
# loaded via a namespace (and not attached):
#   [1] zip_1.0.0         Rcpp_1.0.2        cellranger_1.1.0  pillar_1.4.2      compiler_3.5.1    forcats_0.4.0    
# [7] tools_3.5.1       zeallot_0.1.0     tibble_2.1.3      nlme_3.1-137      lattice_0.20-35   pkgconfig_2.0.3  
# [13] rlang_0.4.2       openxlsx_4.1.0    cli_1.1.0         rstudioapi_0.10   curl_3.3          yaml_2.2.0       
# [19] parallel_3.5.1    pbivnorm_0.6.0    haven_2.2.0       rio_0.5.16        vctrs_0.2.1       hms_0.5.3        
# [25] stats4_3.5.1      grid_3.5.1        tidyselect_0.2.5  glue_1.3.1        data.table_1.11.8 R6_2.4.0         
# [31] fansi_0.4.0       readxl_1.3.1      foreign_0.8-70    carData_3.0-2     purrr_0.3.3       car_3.0-2        
# [37] magrittr_1.5      backports_1.1.5   abind_1.4-5       assertthat_0.2.1  mnormt_1.5-5      utf8_1.1.4       
# [43] stringi_1.4.3     crayon_1.3.4  
